### Principal Component Analysis

# library
library(tidyverse)    # tidyverse
library(corrr)        # correlation 
library(ggcorrplot)   # correlation
library(psych)        # PCA

#####################################
### PCA
#####################################

# set-up
rm(list=ls())
setwd("C:/Users/timba/OneDrive - Universität Bayreuth (1)/Uni/Research General/Inequality and Protests/empirics")
load("master/alldata.rdata")
alldata = subset(alldata, year == 2022)

### PCA Resource Dimensions
network = c("ln_online_network", "clustering_county", "support_ratio_county")
orga = c("org_ngo_pc", "org_char_pc", "civic_organizations_county")
monetary = c("log_median_household_income", "other_income", "log_per_capita_income")
rmp = c("ln_online_network", "clustering_county", "support_ratio_county", "org_ngo_pc", "org_char_pc", "civic_organizations_county", "log_median_household_income", "other_income", "log_per_capita_income")

### PCA with relevant variables
### resources
panel_rmp = dplyr::select(alldata, c(fips, year, rmp))
# standardize
panel_rmp = panel_rmp %>% 
  mutate(across(c(names(panel_rmp)[3:(length(rmp)+2)]), scale))
# drop nas
panel_rmp = na.omit(panel_rmp)
# PCA
na.omit(panel_rmp[3:(length(rmp)+2)]) %>% 
  cor() %>% 
  ggcorrplot( method = "square", type = "lower")
corr <- cor(panel_rmp[3:(length(rmp)+2)])
pca <- principal(corr, nfactors=3, rotate = "promax", score =TRUE)
pca$loadings

